SplitWithOverlap

沿指定轴(axis)将输入张量切分为多个输出张量。与标准 Split 不同,该算子允许通过 start_indicesend_indices 自定义每个输出块的起始和结束位置,从而支持输出块之间的重叠(Overlap)。

\[\text{对于第 } j \text{ 个输出张量,其在 axis 轴上的第 } k \text{ 个元素对应:}\]
\[Output[j]_{(\dots, k, \dots)} = Input_{(\dots, start\_indices[j] + k, \dots)} \quad \text{其中 } 0 \le k < (end\_indices[j] - start\_indices[j])\]
输入:
  • input - 输入张量数据地址。

  • outputs - 输出张量地址数组(指针数组)。

  • axis - 进行切分的轴索引。

  • input_shape - 输入张量的形状数组。

  • input_ndim - 输入张量的维度。

  • num_split - 输出张量的数量。

  • start_indices - 每个输出张量在切分轴上的起始索引数组。

  • end_indices - 每个输出张量在切分轴上的结束索引数组。

  • core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。

输出:
  • outputs - 各个输出张量中填充了切分后的数据。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 int8, int16, int32, fp32, fp64, cplx64, cplx128

  • MT7004 支持 fp16, fp32, int16, int32, cplx64

  • 算子支持不连续切分或有重叠的切分。

  • 每个输出张量在非切分轴上的维度与输入张量保持一致。

共享存储版本:

void i8_split_with_overlap_s(int8_t *input, int8_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void i16_split_with_overlap_s(int16_t *input, int16_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void i32_split_with_overlap_s(int32_t *input, int32_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void hp_split_with_overlap_s(half *input, half *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void fp_split_with_overlap_s(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void dp_split_with_overlap_s(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void c64_split_with_overlap_s(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
void c128_split_with_overlap_s(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)

C调用示例:

 1//FT78NE示例(共享存储)
 2#include "78NE/utils.h"
 3
 4int main() {
 5    float *input = (float *)0xA0000000;
 6    float *out0 = (float *)0xB0000000;
 7    float *out1 = (float *)0xB1000000;
 8    float *outputs[] = {out0, out1};
 9    int input_shape[] = {8, 200, 10};
10    int start_indices[] = {0, 150};
11    int end_indices[] = {80, 200};
12    int core_mask = 0xFF;
13
14    fp_split_with_overlap_s(input, outputs, 1, input_shape, 3, 2, start_indices, end_indices, core_mask);
15    return 0;
16}

私有存储版本:

void i8_split_with_overlap_p(int8_t *input, int8_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void i16_split_with_overlap_p(int16_t *input, int16_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void i32_split_with_overlap_p(int32_t *input, int32_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void hp_split_with_overlap_p(half *input, half *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void fp_split_with_overlap_p(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void dp_split_with_overlap_p(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void c64_split_with_overlap_p(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
void c128_split_with_overlap_p(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)

C调用示例:

 1//MT7004 示例(私有存储)
 2#include <stdio.h>
 3
 4int main() {
 5    float *input = (float *)0x10810000;
 6    float *out0 = (float *)0x10820000;
 7    float *outputs[] = {out0};
 8    int input_shape[] = {4, 10, 5};
 9    int start_idx[] = {0};
10    int end_idx[] = {5};
11
12    fp_split_with_overlap_p(input, outputs, 1, input_shape, 3, 1, start_idx, end_idx);
13    return 0;
14}